/********************************US patents with EP or foreign parallel applications********************************
Main purpose: identify US patents with EP or foreign parallel applications
datasets:
tls201_part1-9: all patent filings,  obtained from PATSTAT (table 201)
tls201_us: all patent filed at USPTO, obtained from PATSTAT (table 201), i.e., extracted from tls201_part1-9
tls201_ep: all patent filed at EPO, obtained from PATSTAT (table 201), i.e., extracted from tls201_part1-9
us9803: patent applications filed at the USPTO from 1998 through 2003.
us_ep_twin: US-EP twins, constructed based on data from PATSTAT 2017
********************************US patents with EP or foreign parallel applications********************************/


log using "$DATA\tabl1_log.txt", replace
**********************Table 1: Sample Count for US patents with EP parallel applications*************************
tempfile ep
use "$DATA\tls201_ep",clear 
keep if appln_filing_date>=td(1jul1996) & appln_filing_date<=td(30jun2005)
keep appln_id docdb_family_id appln_filing_date 
rename appln_id appid_ep
rename appln_filing_date appdate_ep
save `ep', replace


use "$DATA\us9803",clear //us9803_public.dta cannot replicate the statistics before excluding pending/abandoned patent applications in Table 1.

count //U.S. applications filed between Jan. 1, 1998 and Dec. 31, 2003

keep if dpatent==1
count //Exclude US ungranted patent applicants (abandoned or pending) 

*get family id
drop if missing(appln_id)
joinby appln_id using "$DATA\tls201_us", unmatched(master)
tab _merge
keep if _merge==3 //only 2 unmatched
keep appln_id docdb_family_id dpatent appln_filing_date
rename appln_id appln_id_us
rename appln_filing_date appdate_us

*get EP patents
joinby docdb_family_id using `ep', unmatched(master)
tab _merge //(845,904 observations deleted)
keep if _merge==3
drop _merge 

//Exclude US patents that don't match to EP equivalents
distinct appln_id_us 
dis (r(ndistinct) +r(N) )/r(ndistinct) 


//Require EP filings are non-PCT and filed w/in 18mo of US equivalent
use "$DATA\us_ep_twin",clear
count if uspto==1
local us=r(N)
count if uspto==0
local ep=r(N)
dis (`us'+`ep')/`us'
log close





**********************Figure D1: time trend of EP or foreign parallel applications*************************
use "$DATA\tls201_us",clear
keep if grant==1
keep if appln_filing_date>=td(1jan1998) & appln_filing_date<=td(31dec2003)
keep if docdb_family_size>1
keep docdb_family_id appln_filing_date
duplicates drop
rename appln_filing_date appdate_us
tempfile usfam
save `usfam', replace

local varj "1 2 3 4 5 6 7 8" //9 parts in total,*appln_id>900000000 for tls201_part09
foreach j of local varj{
use "$EP\tls201_part0`j'",clear
*Non-US foreign app
drop if appln_auth=="US"
*within possible date range
keep if appln_filing_date>=td(1jul1996) & appln_filing_date<=td(30jun2005)
joinby docdb_family_id using `usfam'
rename appln_id appid_for
rename appln_filing_date appdate_for
*restricted to 18 month time lead-lag
keep if abs(appdate_for-appdate_us)<=550
keep appid_for appdate_for docdb_family_id
if `j'==1{
saveold "$DATA\app9803us_for", replace
}
else{
append using "$DATA\app9803us_for"
saveold "$DATA\app9803us_for", replace
}
}
duplicates drop //124K dropped
label data "foreign fam member that can be matched to us member"
saveold "$DATA\app9803us_for", replace

	 
use "$DATA\app9803us_for",clear
keep docdb_family_id
duplicates drop 
tempfile usfor
label data "us appid with foreign parallel app"
save `usfor', replace

	 
tempfile ep
use "$DATA\tls201_ep",clear 
keep if appln_filing_date>=td(1jul1996) & appln_filing_date<=td(30jun2005)
keep appln_id docdb_family_id appln_filing_date
rename appln_id appid_ep
rename appln_filing_date appdate_ep
save `ep', replace

tempfile ep2
use "$DATA\tls201_us",clear
keep if grant==1
keep if appln_filing_date>=td(1jan1998) & appln_filing_date<=td(31dec2003)
keep if docdb_family_size>1
keep docdb_family_id appln_filing_date
duplicates drop
joinby docdb_family_id using `ep'
*restricted to 18 month time lead-lag
keep if abs(appln_filing_date-appdate)<=550
keep docdb_family_id
duplicates drop
save `ep2', replace



use "$DATA\tls201_us",clear
keep if grant==1
keep if appln_filing_date>=td(1jan1998) & appln_filing_date<=td(31dec2003)
joinby docdb_family_id using `ep2', unmatched(master)
tab _merge //29% matched
gen eppri=(_merge==3)
drop _merge

joinby docdb_family_id using `usfor', unmatched(master)
tab _merge
gen forpri=(_merge==3)
drop _merge

gen month=mofd(appln_filing_date)
format month %tm
gen qtr=qofd(appln_filing_date)
format qtr %tq


preserve
bys month: egen epmean=mean(eppri)
bys month: egen formean=mean(forpri)
bys month: keep if _n==1
twoway  (scatter  epmean month, lcolor(gray) lwidth(1) msymbol(plus) lpattern(dash))  /// 
	(scatter formean month, lcolor(black) lwidth(1) yaxis(2) msymbol(circle_hollow) mcolor(gray) ) ///
	, 	 xline(490, lcolor(gray) lpattern("shortdash") lwidth(thick)  )   ///
	 title("U.S. Patents with EP or Foreign Parallel Applications", siz(med)) ///
    legend( region(color(white)) c(3) order(1 "W/ EP parallel filings" 2 "W/ foreign parallel filings--right axis" )) ///
	xtitle("") ytitle("", axis(1)) ytitle("", axis(2)) ///
	 yla( 0.28(0.005)0.315, nogrid glc(gs14) axis(1)) yla( 0.51(0.01)0.58, nogrid glc(gs14) axis(2)) ///
	plotregion(fcolor(white)) graphregion(color(white)) bgcolor(white) 
	graph export "$DATA\us_for.eps", replace 
restore




